The PGC3 MDD study is including the following analyses to identify genes associated with MDD:
Show code
library(biomaRt)
library(GenomicRanges)
# Insert nearest gene information
gene_attributes = c('ensembl_gene_id', 'hgnc_symbol', 'external_gene_name','chromosome_name','start_position','end_position')
# GRCh37 for position
ensembl37 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh=37)
Genes37<-getBM(attributes=gene_attributes, mart = ensembl37)
# remove alternative contigs
Genes37 <- Genes37[Genes37$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes37$cpid <- with(Genes37, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes37 <- Genes37[!duplicated(Genes37$ensembl_gene_id),]
Genes37 <- Genes37[!duplicated(Genes37$cpid),]
Genes37 <- Genes37[!duplicated(Genes37$external_gene_name),]
# GRCH38 for gene names
ensembl38 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
Genes38<-getBM(attributes=gene_attributes, mart = ensembl38)
# remove alternative contigs
Genes38 <- Genes38[Genes38$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes38$cpid <- with(Genes38, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes38 <- Genes38[!duplicated(Genes38$ensembl_gene_id),]
Genes38 <- Genes38[!duplicated(Genes38$cpid),]
#no_gene_name38 <- which(Genes38$external_gene_name == "")
#Genes38$external_gene_name[no_gene_name38] = Genes38$cpid[no_gene_name38]
#Genes38 <- Genes38[!duplicated(Genes38$external_gene_name),]
# 37 positions with 38 gene names
Genes <- merge(Genes37, Genes38[,c('ensembl_gene_id', 'chromosome_name', 'external_gene_name', 'hgnc_symbol', 'start_position', 'end_position')], by=c('ensembl_gene_id'), all=TRUE, suffix=c('.37', '.38'))
# copy over build 37 gene name if it is missing in 38
coalesce_gene_name <- which(is.na(Genes$external_gene_name.38))
Genes$external_gene_name = with(Genes, ifelse(is.na(external_gene_name.38) | external_gene_name.38 == "", yes=external_gene_name.37, no=external_gene_name.38))
##########
# Nearest gene
##########
library(data.table)
# Read in GWAS results
# Currently we are using results only for lead indepdendant associations from COJO
# I think this is fine
indep_hits<-fread(here::here('docs/tables/meta_snps_full_eur.cojo.txt'))
# Link SNPs to nearest gene
window<-50000
for(i in 1:nrow(indep_hits)){
Genes_i<-Genes[which(Genes$start_position.37 < (indep_hits$BP[i] + window) & Genes$end_position.37 > (indep_hits$BP[i] - window) & Genes$chromosome_name.37 == indep_hits$CHR[i]),]
if(nrow(Genes_i) != 0){
gene_string<-NULL
for(j in 1:nrow(Genes_i)){
if(indep_hits$BP[i] > Genes_i$start_position.37[j] & indep_hits$BP[i] < Genes_i$end_position.37[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=0,
Pos=NA))
}
if(indep_hits$BP[i] < Genes_i$start_position.37[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=abs(indep_hits$BP[i] - Genes_i$start_position.37[j]),
Pos='-'))
}
if(indep_hits$BP[i] > Genes_i$end_position.37[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=abs(indep_hits$BP[i] - Genes_i$end_position.37[j]),
Pos='+'))
}
}
gene_string<-gene_string[order(gene_string$Dist),]
indep_hits$NearestGene[i]<-as.character(gene_string$ID[1])
indep_hits$NearestENSG[i]<-as.character(gene_string$ENSGID[1])
} else {
indep_hits$NearestGene[i]<-NA
indep_hits$NearestENSG[i]<-NA
}
}
##########
# SNP-finemapping
##########
# Read in finemapping results from Joni. This table shows genes implicated by the finemapping results, by a gene containing the entire 95% credible set.
finemap<-fread(here::here('docs/tables/finemapping/Locus_FineMapping_Full_Results.csv'))
# parse out gene names
finemap_genes<-unlist(strsplit(finemap$High.Confidence.Genes.Names, ','))
finemap_genes<-finemap_genes[finemap_genes != '-']
# parse out ensembl ids
finemap_geneids<-unlist(strsplit(finemap$High.Confidence.Genes.IDs, ','))
finemap_geneids<-finemap_geneids[finemap_geneids != '-']
finemap_geneids <- sapply(strsplit(finemap_geneids, '\\.'), function(g) g[[1]])
##########
# FastBAT
##########
# Read in FastBAT results
fastbat<-fread(here::here('docs/tables/fastBAT/mdd_fastbat_AllgeneMatrix.gene.fastbat'))
##########
# H-MAGMA
##########
hmagma<-fread(here::here('docs/tables/H-MAGMA/PGC_MDD_Results_mar2022.csv'))
# Exclude astrocytes
hmagma_noAstr<-hmagma[hmagma$Analysis != 'Astrocytes',]
# Apply FDR correction across all tests
hmagma_noAstr$P.FDR<-p.adjust(hmagma_noAstr$P, method = 'fdr')
hmagma_noAstr<-merge(hmagma_noAstr, Genes, by.x='GENE', by.y='ensembl_gene_id')
##########
# TWAS
##########
twas<-fread(here::here('docs/tables/twas/PGC_MDD3_twas_AllTissues_GW.txt'))
twas$chromosome_name <- as.character(twas$CHR)
twas$twas_id <- 1:nrow(twas)
# merge by scaffold (exact overlap)
twas_gr <- with(twas, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
genes_gr <- with(Genes[!is.na(Genes$chromosome_name.37),], GRanges(seqnames=chromosome_name.37, ranges=IRanges(start=start_position.37, end=end_position.37)))
twas_genes_overlaps <- findOverlaps(twas_gr, genes_gr, type='equal')
twas_scaffolds <- twas[twas_genes_overlaps@from,]
twas_scaffolds$ensembl_gene_id <- Genes$ensembl_gene_id[twas_genes_overlaps@to]
twas_scaffolds$external_gene_name.37 <- Genes$external_gene_name.37[twas_genes_overlaps@to]
# merge unmatched twas results by symbol and partial overlap
twas_unmatched <- twas[!twas$twas_id %in% twas_scaffolds$twas_id,]
twas_unmatched_gr <- with(twas_unmatched, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
# find overlaps
twas_unmatched_genes_overlaps <- findOverlaps(twas_unmatched_gr, genes_gr, maxgap=window)
# merge in gene names
twas_symbols <- twas_unmatched[twas_unmatched_genes_overlaps@from,]
twas_symbols$ensembl_gene_id <- Genes$ensembl_gene_id[twas_unmatched_genes_overlaps@to]
twas_symbols$external_gene_name.37 <- Genes$external_gene_name.37[twas_unmatched_genes_overlaps@to]
# keep matches with symbols match
twas_matched_symbols <- twas_symbols[which(twas_symbols$ID == twas_symbols$external_gene_name.37),]
twas_genes <- rbind(twas_scaffolds, twas_matched_symbols)
twas_sig<-twas_genes[twas_genes$TWAS.P < 3.685926e-08,]
##########
# Colocalisation
##########
coloc<-read.csv(here::here('docs/tables/twas/PGC_MDD3_TWAS_colocalisation.csv'))
coloc_sig<-coloc[coloc$COLOC.PP4 > 0.8,]
coloc_sig <- merge(coloc_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# FOCUS
##########
focus<-read.csv(here::here('docs/tables/twas/PGC_MDD3_TWAS.TWSig.FOCUS.results.csv'))
fusion <- fread(here::here("docs/tables/twas/PGC_MDD3_twas_AllTissues_TWSig_CLEAN.txt"))
fusion<-fusion[,c('PANEL','PANEL_clean_short','PANEL_clean'), with=F]
fusion<-fusion[!duplicated(fusion),]
focus<-merge(focus, fusion, by.x='SNP.weight.Set', by.y='PANEL_clean_short')
focus_sig<-focus[focus$FOCUS_pip > 0.5,]
focus_sig <- merge(focus_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# Expression analysis based high confidence genes
##########
expression_highconf_res<-fread(here::here('docs/tables/twas/PGC3_MDD_TWAS_HighConf_results.csv'))
expression_highconf_res <- merge(expression_highconf_res, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# SMR
##########
# Read in the SMR results
smr_res<-list()
smr_res[['eQTLGen_Blood']]<-fread(here::here('docs/tables/twas/eqtlgen_smr_res_GW_withIDs.csv'))
names(smr_res[['eQTLGen_Blood']])[names(smr_res[['eQTLGen_Blood']]) == 'GeneSymbol']<-'HGNCName'
smr_res[['eQTLGen_Blood']]$eQTL_source<-'eQTLGen_Blood'
tissue<-c("Basalganglia","Cerebellum","Cortex","Hippocampus","Spinalcord")
for(tissue_i in tissue){
smr_res[[tissue_i]]<-fread(here::here(paste0('docs/tables/twas/metabrain_',tissue_i,'_smr_res_GW_withIDs.csv')))
smr_res[[tissue_i]]$eQTL_source<-paste0('MetaBrain_',tissue_i)
}
smr_res_dat<-do.call(rbind, smr_res)
smr_res_dat$p_SMR_fdr_all<-p.adjust(smr_res_dat$p_SMR, method="fdr")
smr_res_dat_sig<-smr_res_dat[smr_res_dat$p_SMR_fdr_all < 0.05,]
##########
# HEIDI
##########
heidi<-smr_res_dat_sig[smr_res_dat_sig$p_HEIDI > 0.05,]
##########
# PWAS
##########
# For no just read in the ROSMAP results
pwas<-NULL
for(i in 1:22){
if(i != 6){
pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i))))
} else {
pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i))))
pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i,'.MHC'))))
}
}
pwas$TWAS.P.FDR<-p.adjust(pwas$TWAS.P)
pwas$ID<-gsub('\\..*','', pwas$ID)
# Read in PWAS and SMR results for all significant ROSMAP PWAS assoc results
pwas_smr_rosmap_banner<-fread(here::here('docs/tables/pwas/rosmap_banner_pwas_smr_results.csv'))
pwas_smr_rosmap_banner <- merge(pwas_smr_rosmap_banner, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
########
# PsyOPS
########
psyops <- fread(here::here('docs/tables/psyops/psyops_full_eur.cojo.txt'))
psyops$psy_id <- 1:nrow(psyops)
psyops_genes37 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='gene', by.y='external_gene_name.37')
psyops_genes38 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.38')], by.x='gene', by.y='external_gene_name.38')
psyops_genesen <- merge(psyops, Genes[,c('ensembl_gene_id','external_gene_name')], by.x='gene', by.y='ensembl_gene_id')
psyops_genesen$ensembl_gene_id <- psyops_genesen$gene
psyops_genesen$external_gene_name <- NULL
psyops_genes <- unique(rbind(psyops_genes37, psyops_genes38, psyops_genesen))
psyops_genes <- psyops_genes[!duplicated(psyops_genes$psy_id),]
psyops_select <- psyops_genes[with(psyops_genes, which(extreme_pLI | brain_enriched_expression | neurodevelopmental_disorder)),]
psyops_prioritised<-NULL
for(i in unique(psyops_select$lead_variant)){
tmp<-psyops_select[psyops_select$lead_variant == i,]
tmp<-tmp[tmp$PsyOPS_score == max(tmp$PsyOPS_score),]
tmp<-tmp[tmp$distance == min(tmp$distance),]
psyops_prioritised<-rbind(psyops_prioritised, tmp)
}
This plot will show the number of genes returned by each analysis and the overlap of each analysis
Show code
# Create data.frame listing genes with T/F indicating whether it was supported by each analysis
gene_overlap<-list()
gene_overlap[['Fine-mapping']]<-finemap_geneids
gene_overlap[['Expression']]<-expression_highconf_res$ensembl_gene_id
gene_overlap[['Protein']]<-pwas_smr_rosmap_banner$ensembl_gene_id[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)]
gene_overlap[['Nearest Gene']]<-indep_hits$NearestENSG[!is.na(indep_hits$NearestENSG)]
gene_overlap[['fastBAT']]<-fastbat$Gene[fastbat$Pvalue < 2e-6]
gene_overlap[['H-MAGMA']]<-unique(hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 3.73e-6])
gene_overlap[['PsyOPS']]<-psyops_prioritised$ensembl_gene_id
library(UpSetR)
png(here::here('docs/figures/gene_consensus_upset_dense.png'), units = 'px', res=300, height=1500, width=2500)
upset(fromList(gene_overlap), nsets=10, order.by = "freq")
dev.off()
## png
## 2
gene_overlap_highconf <- gene_overlap[c('Fine-mapping','Expression', 'Protein', 'PsyOPS')]
png(here::here('docs/figures/gene_consensus_upset_highconf.png'), units = 'px', res=300, height=1400, width=1500)
upset(fromList(gene_overlap_highconf), nsets=10, order.by = "freq")
dev.off()
## png
## 2
Show UpSet plot of genes across all analyses
High-confidence genes
Show UpSet plot of genes across high-confidence analyses
High-confidence genes
Show code
# Define high confidence associations
# - Genes implicated by finemapping
# - Genes implicated by TWAS, colocalisation and FOCUS from any panel
# - Genes implicated by PWAS, SMR, coloc and HEIDI in ROSMAP and Banner
high_conf<-unique(unlist(gene_overlap[c('Fine-mapping','Expression', 'Protein')]))
ENSGIDs <- Genes[,c('ensembl_gene_id', 'external_gene_name')]
names(ENSGIDs) <- c('ENSGID', 'ID')
high_conf_tab <- merge(data.frame(ENSGID=high_conf), ENSGIDs)
# finemap
# Joni said he used the same gene list as David Howard (fastBAT)
high_conf_tab$finemap<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene | high_conf_tab$ENSGID[i] %in% finemap_geneids){
if(high_conf_tab$ENSGID[i] %in% finemap_geneids){
high_conf_tab$finemap[i]<-'Sig'
} else {
high_conf_tab$finemap[i]<-'Non-Sig'
}
}
}
# expression
high_conf_tab$expression<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(!(high_conf_tab$ID[i] %in% twas$ID)){
high_conf_tab$expression[i]<-'NA'
} else {
if(!(high_conf_tab$ID[i] %in% expression_highconf_res$ID)){
high_conf_tab$expression[i]<-'Non-Sig'
} else {
if(expression_highconf_res$`SNP-weight Set`[expression_highconf_res$ID == high_conf_tab$ID[i]] == "CMC DLPFC Splicing"){
high_conf_tab$expression[i]<-'Sig'
} else {
if(expression_highconf_res$TWAS.Z[expression_highconf_res$ID == high_conf_tab$ID[i]] > 0){
high_conf_tab$expression[i]<-'Pos'
} else {
high_conf_tab$expression[i]<-'Neg'
}
}
}
}
}
# protein
high_conf_tab$protein<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(!(high_conf_tab$ENSGID[i] %in% pwas$ID)){
high_conf_tab$protein[i]<-'NA'
} else {
if(!(high_conf_tab$ID[i] %in% pwas_smr_rosmap_banner$ID[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)])){
high_conf_tab$protein[i]<-'Non-Sig'
} else {
if(pwas_smr_rosmap_banner$rosmap.TWAS.Z[pwas_smr_rosmap_banner$ID == high_conf_tab$ID[i]] > 0){
high_conf_tab$protein[i]<-'Pos'
} else {
high_conf_tab$protein[i]<-'Neg'
}
}
}
}
# fastBAT
high_conf_tab$fastBAT<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene[fastbat$Pvalue < 2e-6]){
high_conf_tab$fastBAT[i]<-'Sig'
} else {
high_conf_tab$fastBAT[i]<-'Non-Sig'
}
}
}
# hmagma
high_conf_tab$hmagma<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE){
if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 0.05]){
high_conf_tab$hmagma[i]<-'Sig'
} else {
high_conf_tab$hmagma[i]<-'Non-Sig'
}
}
}
# PsyOPS
high_conf_tab$psyops<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% psyops_genes$ensembl_gene_id){
if(high_conf_tab$ENSGID[i] %in% psyops_prioritised$ensembl_gene_id){
high_conf_tab$psyops[i]<-'Sig'
} else {
high_conf_tab$psyops[i]<-'Non-Sig'
}
}
}
# Order genes by the number of analyses indicating them as high confidence.
high_conf_tab_log<-high_conf_tab[,c(-1, -2)]
high_conf_tab_log[high_conf_tab_log == 'NA']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Sig']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Non-Sig']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Pos']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Neg']<-'T'
high_conf_tab_log<-data.frame(sapply( high_conf_tab_log, as.logical))
high_conf_tab_log[is.na(high_conf_tab_log)]<-T
high_conf_tab_log$sum<-rowSums(high_conf_tab_log[,1:3])
high_conf_tab_ordered <-high_conf_tab[order(-high_conf_tab_log$sum, high_conf_tab$ID),-1]
high_conf_tab_ordered$ID<-factor(high_conf_tab_ordered$ID)
names(high_conf_tab_ordered)<-c('ID','Fine-mapping','Expression','Protein','fastBAT','H-MAGMA','PsyOPS')
high_conf_tab_melt<-melt(as.data.table(high_conf_tab_ordered), id.vars='ID')
high_conf_tab_melt$value<-factor(high_conf_tab_melt$value, levels=c('Non-Sig','Sig','Pos','Neg','NA'))
high_conf_tab_melt$ID<-factor(high_conf_tab_melt$ID, levels=rev(levels(high_conf_tab_ordered$ID)))
library(ggplot2)
library(cowplot)
png(here::here('docs/figures/gene_con_heatmap.png'), units = 'px', res=300, height=14000, width=2800)
ggplot(data = high_conf_tab_melt, aes(x='1', y = ID, fill=value)) +
theme_minimal_grid(color = "white") +
geom_tile(colour = 'black', width=1.5) +
scale_fill_manual(values=c('#FFFFFF','#33FF33','#FF6666','#3399FF','#CCCCCC'), drop=F) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
labs(x ='', y = "", title='', fill='') +
facet_grid(~ variable) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
strip.background = element_rect(color="black", fill="grey95", size=0.1, linetype="solid"))
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
dev.off()
## png
## 2
Show heatmap of high confidence associations
High-confidence genes
Gene table
Make a simple table of gene-by-method associations.
methods_genes <- dplyr::bind_rows(lapply(gene_overlap, function(x) data.frame(ENSID=x)), .id='method')
methods_genes$assoc = TRUE
genes_methods_wide <-
tidyr::pivot_wider(dplyr::distinct(methods_genes), names_from='method', values_from='assoc', values_fill=FALSE)
genes_methods_gene_names <-
dplyr::left_join(genes_methods_wide,
dplyr::select(Genes, ENSID=ensembl_gene_id, Gene=external_gene_name,
Chrom_b37=chromosome_name.37, pos_start_b37=start_position.37, pos_end_b37=end_position.37,
Chrom_b38=chromosome_name.38, pos_start_b38=start_position.38, pos_end_b38=end_position.38), by='ENSID')
genes_methods_cpid <- dplyr::select(genes_methods_gene_names, ENSID, Gene,
Chrom_b37, pos_start_b37, pos_end_b37, Chrom_b38, pos_start_b38, pos_end_b38,
Nearest_gene=`Nearest Gene`, Fine_mapping=`Fine-mapping`, Expression, Protein, fastBAT, HMAGMA=`H-MAGMA`, PsyOPS)
genes_methods_cpid$chrom <- with(genes_methods_cpid, dplyr::coalesce(Chrom_b37, Chrom_b38))
genes_methods_cpid$chrom <- with(genes_methods_cpid, ifelse(chrom == 'X', yes=23, no=as.numeric(chrom)))
genes_methods_conf <-
dplyr::arrange(
dplyr::filter(genes_methods_cpid, Nearest_gene | Fine_mapping | Expression | Protein | fastBAT | HMAGMA | PsyOPS),
chrom, pos_start_b37, pos_start_b38
)
genes_methods_conf$chrom <- NULL
genes_methods_conf$Chrom_b38 <- paste0('chr', genes_methods_conf$Chrom_b38)
genes_methods_conf <-
dplyr::select(genes_methods_conf, ENSID, Gene,
Nearest_gene, Fine_mapping, Expression, Protein, fastBAT, HMAGMA, PsyOPS,
ends_with('b37'), ends_with('b38'))
readr::write_tsv(genes_methods_conf, here::here('docs/tables/genes_methods.tsv'), na='')
This will give some indication of how fine-mapped variants may affect gene expression and protein levels, and may also give clarity for associations that have a different direction of effect in the TWAS and PWAS (which is still the case for the GOPC gene).
Show code
###
# Create a dataframe containing gene ID, PANEL, Method and Z scores for all expression and protein analyses.
###
all_func_res<-NULL
# TWAS
twas_tmp<-twas[,c('PANEL','ID','TWAS.Z'), with=F]
twas_tmp$Sig<-twas$TWAS.P < 3.685926e-08
twas_tmp$Coloc<-twas$COLOC.PP4 > 0.8
names(twas_tmp)<-c('Panel','ID','Z','Sig','Coloc')
twas_tmp$Method<-'FUSION'
twas_tmp$Type<-'Expr.'
twas_tmp$Type[grepl('SPLIC',twas_tmp$Panel)]<-'Splice'
# Retain only the most significant assoc for each gene within PANEL (only relevent for splice panel)
twas_tmp<-twas_tmp[order(-abs(twas_tmp$Z)),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$Tissue<-NA
twas_tmp$Tissue[!grepl('Adrenal|BLOOD|Blood|Thyroid',twas_tmp$Panel)]<-'Brain'
twas_tmp$Tissue[grepl('BLOOD|Blood',twas_tmp$Panel)]<-'Blood'
twas_tmp$Tissue[grepl('Adrenal|Thyroid',twas_tmp$Panel)]<-'HPA/HPT'
twas_tmp<-merge(twas_tmp, focus_sig[,c('ID','PANEL','FOCUS_pip')], by.x=c('Panel','ID'), by.y=c('PANEL','ID'), all.x=T)
twas_tmp$FOCUS[twas_tmp$FOCUS_pip > 0.5]<-T
twas_tmp$FOCUS[twas_tmp$FOCUS_pip < 0.5 | is.na(twas_tmp$FOCUS_pip)]<-F
twas_tmp<-twas_tmp[order(-twas_tmp$FOCUS_pip),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$FOCUS_pip<-NULL
all_func_res<-rbind(all_func_res, twas_tmp)
# SMR expression
smr_res_dat$Z<-smr_res_dat$b_SMR/smr_res_dat$se_SMR
smr_res_dat$Sig<-smr_res_dat$p_SMR_fdr_all < 0.05
smr_res_dat$Coloc<-smr_res_dat$p_HEIDI > 0.05
smr_tmp<-smr_res_dat[,c('eQTL_source','HGNCName','Z','Sig','Coloc'), with=F]
names(smr_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_tmp$Method<-'SMR'
smr_tmp$Type<-'Expr.'
smr_tmp$Tissue<-NA
smr_tmp$Tissue[!grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Brain'
smr_tmp$Tissue[grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Blood'
smr_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, smr_tmp)
# PWAS
pwas_smr_rosmap_banner$rosmap.sig<-T
pwas_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
pwas_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
pwas_rosmap_tmp<-pwas_rosmap_tmp[,c('PANEL','ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
names(pwas_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_rosmap_tmp<-pwas_rosmap_tmp[order(-abs(pwas_rosmap_tmp$Z)),]
pwas_rosmap_tmp<-pwas_rosmap_tmp[!duplicated(paste0(pwas_rosmap_tmp$Panel, pwas_rosmap_tmp$ID)),]
pwas_rosmap_tmp$Method<-'FUSION'
pwas_rosmap_tmp$Type<-'Protein'
pwas_rosmap_tmp$Tissue<-'Brain'
pwas_rosmap_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, pwas_rosmap_tmp)
pwas_banner_tmp<-pwas_smr_rosmap_banner[,c('ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
pwas_banner_tmp$PANEL<-'Banner et al. DLPFC'
pwas_banner_tmp<-pwas_banner_tmp[,c('PANEL','ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
names(pwas_banner_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_banner_tmp<-pwas_banner_tmp[order(-abs(pwas_banner_tmp$Z)),]
pwas_banner_tmp<-pwas_banner_tmp[!duplicated(paste0(pwas_banner_tmp$Panel, pwas_banner_tmp$ID)),]
pwas_banner_tmp$Method<-'FUSION'
pwas_banner_tmp$Type<-'Protein'
pwas_banner_tmp$Tissue<-'Brain'
pwas_banner_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, pwas_banner_tmp)
# SMR protein
pwas_smr_rosmap_banner$z_SMR<-abs(qnorm(pwas_smr_rosmap_banner$p_SMR/2))
pwas_smr_rosmap_banner$z_SMR<-sign(pwas_smr_rosmap_banner$b_SMR)*pwas_smr_rosmap_banner$z_SMR
smr_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','z_SMR','smr_replicated','smr.causal'), with=F]
smr_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
smr_rosmap_tmp<-smr_rosmap_tmp[,c('PANEL','ID','z_SMR','smr_replicated','smr.causal'), with=F]
names(smr_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_rosmap_tmp<-smr_rosmap_tmp[order(-abs(smr_rosmap_tmp$Z)),]
smr_rosmap_tmp<-smr_rosmap_tmp[!duplicated(paste0(smr_rosmap_tmp$Panel, smr_rosmap_tmp$ID)),]
smr_rosmap_tmp$Method<-'SMR'
smr_rosmap_tmp$Type<-'Protein'
smr_rosmap_tmp$Tissue<-'Brain'
smr_rosmap_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, smr_rosmap_tmp)
# Subset to high confidence genes
high_conf_id<-Genes$external_gene_name[Genes$ensembl_gene_id %in% high_conf]
all_func_res<-all_func_res[all_func_res$ID %in% high_conf_id,]
# Remove missing values
all_func_res<-all_func_res[complete.cases(all_func_res),]
all_func_res$Group<-paste0(all_func_res$Tissue,'\n',all_func_res$Method,'\n',all_func_res$Type )
all_func_res$Group<-factor(all_func_res$Group, levels=c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr."))
all_func_res$ID<-factor(all_func_res$ID, levels=rev(levels(high_conf_tab_ordered$ID)))
all_func_res$Panel[all_func_res$Panel == "Adrenal_Gland"]<-'GTeX Adrenal Gland'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellar_Hemisphere"]<-'GTeX Cerebellar Hemisphere'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellum"]<-'GTeX Cerebellum'
all_func_res$Panel[all_func_res$Panel == "Brain_Hypothalamus"]<-'GTeX Hypothalamus'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ_SPLICING"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "Pituitary"]<-'GTeX Pituitary'
all_func_res$Panel[all_func_res$Panel == "PsychENCODE"]<-'PsychENCODE DLPFC'
all_func_res$Panel[all_func_res$Panel == "Whole_Blood"]<-'GTeX'
all_func_res$Panel[all_func_res$Panel == "NTR.BLOOD.RNAARR"]<-'NTR'
all_func_res$Panel[all_func_res$Panel == "Thyroid"]<-'GTeX Thyroid'
all_func_res$Panel[all_func_res$Panel == "Brain_Caudate_basal_ganglia"]<-'GTeX Caudate basalganglia'
all_func_res$Panel[all_func_res$Panel == "YFS.BLOOD.RNAARR"]<-'YFS'
all_func_res$Panel[all_func_res$Panel == "Brain_Cortex"]<-'GTeX Cortex'
all_func_res$Panel[all_func_res$Panel == "Brain_Frontal_Cortex_BA9"]<-'GTeX Frontal Cortex BA9'
all_func_res$Panel[all_func_res$Panel == "Brain_Hippocampus"]<-'GTeX Hippocampus'
all_func_res$Panel[all_func_res$Panel == "Brain_Amygdala"]<-'GTeX Amygdala'
all_func_res$Panel[all_func_res$Panel == "Brain_Anterior_cingulate_cortex_BA24"]<-'GTeX Anterior cingulate cortex BA24'
all_func_res$Panel[all_func_res$Panel == "Brain_Substantia_nigra"]<-'GTeX Substantia nigra'
all_func_res$Panel[all_func_res$Panel == "Brain_Nucleus_accumbens_basal_ganglia"]<-'GTeX Nucleus accumbens basalganglia'
all_func_res$Panel[all_func_res$Panel == "Brain_Putamen_basal_ganglia"]<-'GTeX Nucleus Putamen basalganglia'
all_func_res$Panel[all_func_res$Panel == "eQTLGen_Blood"]<-'eQTLGen'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cerebellum"]<-'MetaBrain Cerebellum'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Basalganglia"]<-'MetaBrain Basalganglia'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cortex"]<-'MetaBrain Cortex'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Hippocampus"]<-'MetaBrain Hippocampus'
all_func_res$Panel<-factor(all_func_res$Panel, levels=c("GTeX Adrenal Gland" ,"GTeX Amygdala" ,"GTeX Anterior cingulate cortex BA24" ,"GTeX Caudate basalganglia" ,"GTeX Cerebellar Hemisphere" ,"GTeX Cerebellum" ,"GTeX Cortex" ,"GTeX Frontal Cortex BA9" ,"GTeX Hippocampus" ,"GTeX Hypothalamus", "GTeX Nucleus accumbens basalganglia","GTeX Nucleus Putamen basalganglia" ,"GTeX Pituitary",'GTeX Substantia nigra' ,"GTeX Thyroid","CMC DLPFC", "PsychENCODE DLPFC", "GTeX" ,"NTR" ,"YFS", "eQTLGen" ,'MetaBrain Basalganglia',"MetaBrain Cerebellum" ,"MetaBrain Cortex" , 'MetaBrain Hippocampus',"ROSMAP DLPFC" ,"Banner et al. DLPFC"))
# Create heatmap
library(ggplot2)
heatmap<-ggplot(data = all_func_res, aes(x = Panel, y = ID)) +
theme_bw() +
geom_tile(aes(fill = Z), width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='black', fill=NA, size=0.3, width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='green2', fill=NA, size=0.3, width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T & all_func_res$FOCUS == T,], aes(x = Panel, y = ID), colour='magenta2', fill=NA, size=0.3, width=0.95, height=0.95) +
scale_fill_gradientn(colours=c("dodgerblue2","white","red"), na.value = 'white',name = "Z-score") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label=round(Z,1)), color="black", size=3) +
labs(title="High confidence genes across panels and methods") +
facet_wrap(~ Group , nrow=1, scales = "free_x")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
group_siz<-NULL
for(i in c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr.")){
group_siz<-rbind(group_siz, data.frame(Group=i,
Size=length(unique(all_func_res$Panel[all_func_res$Group==i]))))
}
# Set minimum size to 3 to allow space for labels
group_siz$Size[group_siz$Size < 2]<-2
group_siz$Prop<-group_siz$Size/sum(group_siz$Size)
group_siz$Width<-4*group_siz$Prop
library(grid)
gt = ggplot_gtable(ggplot_build(heatmap))
for(i in 1:nrow(group_siz)){
gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]] = group_siz$Width[i]*gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]]
}
png('../../docs/figures/gene_con_func_heatmap.png', units = 'px', res=300, height=8000, width=3200)
grid.draw(gt)
a<-dev.off()
Show heatmap of high confidence associations across expression and protein datasets and methods
High-confidence genes
Conduct an overrepresentation test with PANTHER using the PANTHER API. The service returns results as JSON. The first step is querying the IDs for humans (the taxon) and the annotation datasets.
Show code
library(httr)
library(jsonlite)
# the PantherDB URL
panther_db <- "http://pantherdb.org"
# Get list of taxon IDs
supportedgenomes_response <- GET(modify_url(panther_db, path="/services/oai/pantherdb/supportedgenomes"))
# find the taxon ID for humans
supportedgenomes <- fromJSON(content(supportedgenomes_response, "text"))
genomes <- supportedgenomes$search$output$genomes$genome
human_taxon_id <- genomes$taxon_id[which(genomes$name == 'human')]
# get list of annotation datasets
supportedannotdatasets_response <- GET(modify_url(panther_db, path="services/oai/pantherdb/supportedannotdatasets"))
# find GO ids for biological process, molecular function, and cellular components
supportedannotdatasets <- fromJSON(content(supportedannotdatasets_response, "text"))
annotation_data_types <- supportedannotdatasets$search$annotation_data_sets$annotation_data_type
biological_process_id = annotation_data_types$id[which(annotation_data_types$label == "biological_process")]
cellular_component_id = annotation_data_types$id[which(annotation_data_types$label == "cellular_component")]
molecular_function_id = annotation_data_types$id[which(annotation_data_types$label == "molecular_function")]
The next step is to query the overrepresentation test.
Show code
# construct enrichment overrepresentation query from gene list
# and annotation ID
overrep_url <- function(gene_list, annot_data_set_id, url=panther_db, organism_id=human_taxon_id) {
modify_url(url,
path="/services/oai/pantherdb/enrich/overrep",
query=list(geneInputList=paste(gene_list, collapse=","),
organism=organism_id,
annotDataSet=annot_data_set_id,
enrichmentTestType="FISHER",
correction="FDR"))
}
# construct URL and query PANTHER. Parse out JSON response
overrep_query <- function(genes, annot_data_set_id, ...) {
# make query
overrep_response <- GET(overrep_url(genes, annot_data_set_id, ...))
# parse JSON
overrep <- fromJSON(content(overrep_response, "text"))
return(overrep)
}
high_conf_overrep_biol <- overrep_query(high_conf, biological_process_id)
high_conf_overrep_mol <- overrep_query(high_conf, molecular_function_id)
high_conf_overrep_cell<- overrep_query(high_conf, cellular_component_id)
Show code
# extract results table from the query
panther_format <- function(query) {
results <- query$results$result
results_labels <- results$term
results_terms <- cbind(results_labels,
results[,c('fold_enrichment', 'fdr',
'number_in_list', 'expected',
'number_in_reference', 'pValue')])
return(results_terms)
}
high_conf_overrep_biol_results <- panther_format(high_conf_overrep_biol)
high_conf_overrep_mol_results <- panther_format(high_conf_overrep_mol)
high_conf_overrep_cell_results <- panther_format(high_conf_overrep_cell)
Show Go: Biological Processes table
# filter for FDR
knitr::kable(high_conf_overrep_biol_results[which(high_conf_overrep_biol_results$fdr <= 0.05),], caption='GO: Biological Processes')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|---|---|---|---|---|---|---|
| GO:0007399 | nervous system development | 2.7177826 | 0.0000000 | 59 | 21.7088737 | 2191 | 0.0000000 |
| GO:0007275 | multicellular organism development | 1.8858068 | 0.0000249 | 79 | 41.8918840 | 4228 | 0.0000000 |
| GO:0048731 | system development | 1.9459507 | 0.0000206 | 74 | 38.0276847 | 3838 | 0.0000000 |
| GO:0048856 | anatomical structure development | 1.7069601 | 0.0002165 | 87 | 50.9677983 | 5144 | 0.0000001 |
| GO:0032502 | developmental process | 1.6355884 | 0.0003225 | 92 | 56.2488708 | 5677 | 0.0000001 |
| GO:0007417 | central nervous system development | 2.9282342 | 0.0004811 | 30 | 10.2450823 | 1034 | 0.0000002 |
| GO:0022008 | neurogenesis | 2.5818399 | 0.0015368 | 33 | 12.7815824 | 1290 | 0.0000007 |
| GO:0065008 | regulation of biological quality | 1.8115711 | 0.0015966 | 66 | 36.4324639 | 3677 | 0.0000008 |
| GO:0032501 | multicellular organismal process | 1.5182678 | 0.0019559 | 99 | 65.2058866 | 6581 | 0.0000011 |
| GO:0007420 | brain development | 3.1254649 | 0.0018301 | 24 | 7.6788576 | 775 | 0.0000012 |
| GO:0050807 | regulation of synapse organization | 5.7672269 | 0.0029989 | 12 | 2.0807227 | 210 | 0.0000021 |
| GO:0099537 | trans-synaptic signaling | 3.9808585 | 0.0029142 | 17 | 4.2704357 | 431 | 0.0000022 |
| GO:0007416 | synapse assembly | 8.4105392 | 0.0028188 | 9 | 1.0700860 | 108 | 0.0000023 |
| GO:0048523 | negative regulation of cellular process | 1.6422946 | 0.0026664 | 77 | 46.8856185 | 4732 | 0.0000024 |
| GO:0120035 | regulation of plasma membrane bounded cell projection organization | 3.3116498 | 0.0024947 | 21 | 6.3412502 | 640 | 0.0000024 |
| GO:0050803 | regulation of synapse structure or activity | 5.6070261 | 0.0027195 | 12 | 2.1401719 | 216 | 0.0000028 |
| GO:0050808 | synapse organization | 4.6942544 | 0.0027578 | 14 | 2.9823692 | 301 | 0.0000030 |
| GO:0060322 | head development | 2.9431778 | 0.0027913 | 24 | 8.1544514 | 823 | 0.0000032 |
| GO:0031344 | regulation of cell projection organization | 3.2308779 | 0.0028593 | 21 | 6.4997814 | 656 | 0.0000035 |
| GO:0048699 | generation of neurons | 2.5810120 | 0.0027974 | 29 | 11.2359027 | 1134 | 0.0000036 |
| GO:0009653 | anatomical structure morphogenesis | 2.0302598 | 0.0032861 | 45 | 22.1646510 | 2237 | 0.0000044 |
| GO:0032879 | regulation of localization | 2.0626608 | 0.0032979 | 43 | 20.8468600 | 2104 | 0.0000046 |
| GO:0099536 | synaptic signaling | 3.7218004 | 0.0036071 | 17 | 4.5676818 | 461 | 0.0000053 |
| GO:0048812 | neuron projection morphogenesis | 3.5819415 | 0.0056113 | 17 | 4.7460294 | 479 | 0.0000086 |
| GO:0120039 | plasma membrane bounded cell projection morphogenesis | 3.5449380 | 0.0061371 | 17 | 4.7955705 | 484 | 0.0000098 |
| GO:0034762 | regulation of transmembrane transport | 3.2337318 | 0.0061707 | 19 | 5.8755646 | 593 | 0.0000102 |
| GO:0048858 | cell projection morphogenesis | 3.5086912 | 0.0064625 | 17 | 4.8451115 | 489 | 0.0000111 |
| GO:0000902 | cell morphogenesis | 2.9725889 | 0.0066603 | 21 | 7.0645490 | 713 | 0.0000119 |
| GO:0048513 | animal organ development | 1.7679191 | 0.0064534 | 57 | 32.2412939 | 3254 | 0.0000119 |
| GO:0010975 | regulation of neuron projection development | 3.6288169 | 0.0071358 | 16 | 4.4091505 | 445 | 0.0000137 |
| GO:0031346 | positive regulation of cell projection organization | 4.0255572 | 0.0080729 | 14 | 3.4777794 | 351 | 0.0000160 |
| GO:0048519 | negative regulation of biological process | 1.5383975 | 0.0086864 | 81 | 52.6521929 | 5314 | 0.0000177 |
| GO:0032990 | cell part morphogenesis | 3.3774606 | 0.0084894 | 17 | 5.0333673 | 508 | 0.0000179 |
| GO:0034765 | regulation of ion transmembrane transport | 3.3642157 | 0.0086489 | 17 | 5.0531837 | 510 | 0.0000188 |
| GO:0098742 | cell-cell adhesion via plasma-membrane adhesion molecules | 4.5360212 | 0.0095777 | 12 | 2.6454903 | 267 | 0.0000214 |
| GO:0007268 | chemical synaptic transmission | 3.6567562 | 0.0102105 | 15 | 4.1019962 | 414 | 0.0000234 |
| GO:0098916 | anterograde trans-synaptic signaling | 3.6567562 | 0.0099346 | 15 | 4.1019962 | 414 | 0.0000234 |
| GO:0050890 | cognition | 4.1389404 | 0.0100762 | 13 | 3.1409005 | 317 | 0.0000244 |
| GO:0051252 | regulation of RNA metabolic process | 1.6677615 | 0.0101049 | 62 | 37.1755792 | 3752 | 0.0000251 |
| GO:0043269 | regulation of ion transport | 2.8510302 | 0.0134991 | 20 | 7.0150080 | 708 | 0.0000344 |
| GO:0010976 | positive regulation of neuron projection development | 5.8602467 | 0.0137779 | 9 | 1.5357715 | 155 | 0.0000360 |
| GO:0098609 | cell-cell adhesion | 3.1597606 | 0.0150420 | 17 | 5.3801545 | 543 | 0.0000403 |
| GO:0030182 | neuron differentiation | 2.4639326 | 0.0153139 | 26 | 10.5522366 | 1065 | 0.0000420 |
| GO:0051049 | regulation of transport | 2.0573912 | 0.0153786 | 36 | 17.4978872 | 1766 | 0.0000432 |
| GO:0050804 | modulation of chemical synaptic transmission | 3.4563860 | 0.0152459 | 15 | 4.3397931 | 438 | 0.0000438 |
| GO:0019219 | regulation of nucleobase-containing compound metabolic process | 1.6130368 | 0.0149637 | 65 | 40.2966633 | 4067 | 0.0000439 |
| GO:0099177 | regulation of trans-synaptic signaling | 3.4485127 | 0.0149671 | 15 | 4.3497013 | 439 | 0.0000449 |
| GO:0007610 | behavior | 2.9978160 | 0.0150994 | 18 | 6.0043713 | 606 | 0.0000462 |
| GO:0034330 | cell junction organization | 3.2426175 | 0.0162541 | 16 | 4.9342853 | 498 | 0.0000508 |
| GO:0032409 | regulation of transporter activity | 4.0505607 | 0.0192586 | 12 | 2.9625528 | 299 | 0.0000614 |
| GO:0010468 | regulation of gene expression | 1.5383231 | 0.0190754 | 74 | 48.1043276 | 4855 | 0.0000620 |
| GO:0007156 | homophilic cell adhesion via plasma membrane adhesion molecules | 5.3431661 | 0.0214559 | 9 | 1.6843946 | 170 | 0.0000712 |
| GO:1900244 | positive regulation of synaptic vesicle endocytosis | 50.4632353 | 0.0221424 | 3 | 0.0594492 | 6 | 0.0000748 |
| GO:0050793 | regulation of developmental process | 1.8247052 | 0.0248940 | 45 | 24.6615183 | 2489 | 0.0000857 |
| GO:0001764 | neuron migration | 5.8087177 | 0.0296578 | 8 | 1.3772403 | 139 | 0.0001040 |
| GO:0032412 | regulation of ion transmembrane transporter activity | 4.0815852 | 0.0326857 | 11 | 2.6950313 | 272 | 0.0001167 |
| GO:0048666 | neuron development | 2.5141825 | 0.0343739 | 21 | 8.3526155 | 843 | 0.0001249 |
| GO:0007611 | learning or memory | 4.0370588 | 0.0346216 | 11 | 2.7247559 | 275 | 0.0001281 |
| GO:0050794 | regulation of cellular process | 1.2450034 | 0.0342498 | 138 | 110.8430715 | 11187 | 0.0001289 |
| GO:2001257 | regulation of cation channel activity | 4.9099364 | 0.0343715 | 9 | 1.8330176 | 185 | 0.0001315 |
| GO:0034329 | cell junction assembly | 4.0224318 | 0.0339417 | 11 | 2.7346641 | 276 | 0.0001320 |
| GO:0032989 | cellular component morphogenesis | 2.8406457 | 0.0358634 | 17 | 5.9845549 | 604 | 0.0001418 |
| GO:1903423 | positive regulation of synaptic vesicle recycling | 37.8474265 | 0.0360630 | 3 | 0.0792656 | 8 | 0.0001449 |
| NA | UNCLASSIFIED | 0.3703724 | 0.0371666 | 10 | 26.9998543 | 2725 | 0.0001517 |
| GO:0008150 | biological_process | 1.0960443 | 0.0365948 | 194 | 177.0001457 | 17864 | 0.0001517 |
| GO:0022898 | regulation of transmembrane transporter activity | 3.9368481 | 0.0375844 | 11 | 2.7941134 | 282 | 0.0001582 |
| GO:0048667 | cell morphogenesis involved in neuron differentiation | 3.2112968 | 0.0388950 | 14 | 4.3596095 | 440 | 0.0001662 |
| GO:0007612 | learning | 5.2772011 | 0.0449656 | 8 | 1.5159551 | 153 | 0.0001950 |
| GO:0048522 | positive regulation of cellular process | 1.4418067 | 0.0455058 | 81 | 56.1795133 | 5670 | 0.0002002 |
| GO:2000311 | regulation of AMPA receptor activity | 15.5271493 | 0.0452179 | 4 | 0.2576133 | 26 | 0.0002019 |
| GO:0050789 | regulation of biological process | 1.2224704 | 0.0454834 | 143 | 116.9762495 | 11806 | 0.0002059 |
| GO:0044057 | regulation of system process | 2.8429992 | 0.0481551 | 16 | 5.6278595 | 568 | 0.0002211 |
Show Go: Molecular functions table
# filter for FDR
knitr::kable(high_conf_overrep_mol_results[which(high_conf_overrep_mol_results$fdr <= 0.05),], caption='GO: Molecular Functions')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|
Show Go: Cellular Components table
# filter for FDR
knitr::kable(high_conf_overrep_cell_results[which(high_conf_overrep_cell_results$fdr <= 0.05),], caption='GO: Cellular Components')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|---|---|---|---|---|---|---|
| GO:0043005 | neuron projection | 3.620031 | 0.0000000 | 50 | 13.8120356 | 1394 | 0.0000000 |
| GO:0045202 | synapse | 3.562111 | 0.0000000 | 48 | 13.4751566 | 1360 | 0.0000000 |
| GO:0036477 | somatodendritic compartment | 4.102702 | 0.0000000 | 35 | 8.5309631 | 861 | 0.0000000 |
| GO:0030425 | dendrite | 4.813663 | 0.0000000 | 30 | 6.2322599 | 629 | 0.0000000 |
| GO:0097447 | dendritic tree | 4.798406 | 0.0000000 | 30 | 6.2520764 | 631 | 0.0000000 |
| GO:0098794 | postsynapse | 4.514283 | 0.0000000 | 28 | 6.2025353 | 626 | 0.0000000 |
| GO:0030054 | cell junction | 2.551512 | 0.0000000 | 54 | 21.1639225 | 2136 | 0.0000000 |
| GO:0030424 | axon | 4.354301 | 0.0000000 | 28 | 6.4304240 | 649 | 0.0000000 |
| GO:0120025 | plasma membrane bounded cell projection | 2.432496 | 0.0000001 | 55 | 22.6105202 | 2282 | 0.0000000 |
| GO:0042995 | cell projection | 2.363815 | 0.0000002 | 56 | 23.6905144 | 2391 | 0.0000000 |
| GO:0098984 | neuron to neuron synapse | 5.371437 | 0.0000011 | 19 | 3.5372286 | 357 | 0.0000000 |
| GO:0098793 | presynapse | 4.213249 | 0.0000043 | 22 | 5.2216232 | 527 | 0.0000000 |
| GO:0030426 | growth cone | 7.763575 | 0.0000049 | 13 | 1.6744864 | 169 | 0.0000000 |
| GO:0030427 | site of polarized growth | 7.497395 | 0.0000066 | 13 | 1.7339356 | 175 | 0.0000000 |
| GO:0014069 | postsynaptic density | 5.263037 | 0.0000073 | 17 | 3.2300743 | 326 | 0.0000001 |
| GO:0032279 | asymmetric synapse | 5.167922 | 0.0000087 | 17 | 3.2895235 | 332 | 0.0000001 |
| GO:0099572 | postsynaptic specialization | 4.944524 | 0.0000150 | 17 | 3.4381466 | 347 | 0.0000001 |
| GO:0098978 | glutamatergic synapse | 4.806022 | 0.0000485 | 16 | 3.3291563 | 336 | 0.0000004 |
| GO:0097060 | synaptic membrane | 4.479765 | 0.0000508 | 17 | 3.7948419 | 383 | 0.0000005 |
| GO:0150034 | distal axon | 5.028365 | 0.0001418 | 14 | 2.7842052 | 281 | 0.0000014 |
| GO:0099240 | intrinsic component of synaptic membrane | 6.569179 | 0.0001675 | 11 | 1.6744864 | 169 | 0.0000017 |
| GO:0043197 | dendritic spine | 6.307904 | 0.0002319 | 11 | 1.7438438 | 176 | 0.0000025 |
| GO:0044309 | neuron spine | 6.237029 | 0.0002459 | 11 | 1.7636602 | 178 | 0.0000028 |
| GO:0045211 | postsynaptic membrane | 4.806022 | 0.0004540 | 13 | 2.7049395 | 273 | 0.0000053 |
| GO:0031224 | intrinsic component of membrane | 1.524569 | 0.0004682 | 90 | 59.0330759 | 5958 | 0.0000057 |
| GO:0098839 | postsynaptic density membrane | 8.872657 | 0.0004689 | 8 | 0.9016465 | 91 | 0.0000060 |
| GO:0099699 | integral component of synaptic membrane | 6.428438 | 0.0004657 | 10 | 1.5555879 | 157 | 0.0000062 |
| GO:0099634 | postsynaptic specialization membrane | 6.900955 | 0.0024073 | 8 | 1.1592598 | 117 | 0.0000330 |
| GO:0070161 | anchoring junction | 2.266313 | 0.0029447 | 30 | 13.2373598 | 1336 | 0.0000418 |
| GO:0098797 | plasma membrane protein complex | 2.780343 | 0.0032956 | 20 | 7.1933557 | 726 | 0.0000484 |
| GO:0043025 | neuronal cell body | 3.197670 | 0.0039264 | 16 | 5.0036427 | 505 | 0.0000596 |
| GO:0016021 | integral component of membrane | 1.461946 | 0.0053496 | 84 | 57.4576716 | 5799 | 0.0000838 |
| GO:0005891 | voltage-gated calcium channel complex | 11.214052 | 0.0079306 | 5 | 0.4458692 | 45 | 0.0001281 |
| GO:0044295 | axonal growth cone | 16.148235 | 0.0105927 | 4 | 0.2477051 | 25 | 0.0001763 |
| GO:0098590 | plasma membrane region | 2.167872 | 0.0105836 | 27 | 12.4546117 | 1257 | 0.0001813 |
| GO:0099061 | integral component of postsynaptic density membrane | 9.704468 | 0.0135905 | 5 | 0.5152266 | 52 | 0.0002395 |
| GO:0044297 | cell body | 2.813281 | 0.0136859 | 16 | 5.6873088 | 574 | 0.0002479 |
| GO:0099055 | integral component of postsynaptic membrane | 5.936851 | 0.0133777 | 7 | 1.1790762 | 119 | 0.0002488 |
| GO:0034702 | ion channel complex | 3.688343 | 0.0142453 | 11 | 2.9823692 | 301 | 0.0002719 |
| GO:0099146 | intrinsic component of postsynaptic density membrane | 9.011292 | 0.0168366 | 5 | 0.5548594 | 56 | 0.0003296 |
| GO:0098936 | intrinsic component of postsynaptic membrane | 5.651882 | 0.0164735 | 7 | 1.2385254 | 125 | 0.0003306 |
| GO:0120111 | neuron projection cytoplasm | 6.728431 | 0.0179491 | 6 | 0.8917383 | 90 | 0.0003690 |
| GO:0005794 | Golgi apparatus | 1.946743 | 0.0191728 | 32 | 16.4377095 | 1659 | 0.0004035 |
| GO:0005798 | Golgi-associated vesicle | 6.582161 | 0.0191416 | 6 | 0.9115547 | 92 | 0.0004123 |
| GO:0005886 | plasma membrane | 1.401522 | 0.0208750 | 83 | 59.2213318 | 5977 | 0.0004598 |
| GO:0034703 | cation channel complex | 4.055081 | 0.0226139 | 9 | 2.2194376 | 224 | 0.0005092 |
| GO:0000785 | chromatin | 2.053277 | 0.0279554 | 26 | 12.6626840 | 1278 | 0.0006431 |
| GO:0043198 | dendritic shaft | 10.623839 | 0.0320100 | 4 | 0.3765117 | 38 | 0.0007521 |
| GO:0071944 | cell periphery | 1.359226 | 0.0340637 | 87 | 64.0069940 | 6460 | 0.0008170 |
| GO:0034704 | calcium channel complex | 7.107498 | 0.0371501 | 5 | 0.7034824 | 71 | 0.0009092 |